This notebook expands the CAFI 2019 survey analysis pipeline so that it is:
Field sampling followed the June–July 2019 neighborhood survey described in the project overview. We pair focal colony measurements with CAFI community composition and physiological data to identify drivers of abundance, diversity, and taxonomic structure.
library(conflicted)
library(here)
library(tidyverse)
library(lubridate)
library(janitor)
library(glue)
library(fs)
library(vegan)
library(broom)
library(patchwork)
library(scales)
library(MASS)
library(tibble)
conflicted::conflict_prefer("filter", "dplyr", quiet = TRUE)
conflicted::conflict_prefer("lag", "dplyr", quiet = TRUE)
conflicted::conflict_prefer("select", "dplyr", quiet = TRUE)
theme_set(theme_minimal(base_size = 12))
plot_theme <- theme_minimal(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
legend.position = "top",
strip.background = element_rect(fill = "#f0f0f0", color = NA),
strip.text = element_text(face = "bold")
)
morphotype_palette <- c(
"Tight branching" = "#1b9e77",
"Wide branching" = "#d95f02",
"Unknown" = "#7570b3"
)
region_palette <- c(
"MRB" = "#1b9e77",
"HAU" = "#d95f02",
"MAT" = "#7570b3"
)
survey_paths <- list(
characteristics = here("data", "Survey", "1. survey_coral_characteristics_merged_v2.csv"),
cafi = here("data", "Survey", "1. survey_cafi_data_w_taxonomy_summer2019_v5.csv"),
phys = here("data", "Survey", "1. survey_master_phys_data_v3.csv")
)
purrr::iwalk(
survey_paths,
~{
if (!fs::file_exists(.x)) {
stop(glue("Missing expected file for {.y}: {.x}"), call. = FALSE)
}
}
)
output_dir <- here("output", "survey")
figure_dir <- here(output_dir, "figures")
table_dir <- here(output_dir, "tables")
fs::dir_create(c(output_dir, figure_dir, table_dir))
characteristics_raw <- readr::read_csv(
survey_paths$characteristics,
na = c("", "NA", "NaN"),
show_col_types = FALSE
)
cafi_raw <- readr::read_csv(
survey_paths$cafi,
na = c("", "NA", "NaN"),
show_col_types = FALSE
)
phys_raw <- readr::read_csv(
survey_paths$phys,
na = c("", "NA", "NaN"),
show_col_types = FALSE
)
list(
characteristics = characteristics_raw,
cafi = cafi_raw,
physiology = phys_raw
) %>%
purrr::imap(~{
message("Rows in ", .y, ": ", nrow(.x))
invisible(NULL)
})
## $characteristics
## NULL
##
## $cafi
## NULL
##
## $physiology
## NULL
characteristics <- characteristics_raw %>%
janitor::clean_names() %>%
mutate(
sample_date = suppressWarnings(mdy(date)),
branch_architecture_raw = branch_width,
morphotype = case_when(
branch_width %in% c("tight", "tight_branching") ~ "Tight branching",
branch_width %in% c("wide", "wide_branching") ~ "Wide branching",
!is.na(morphotype) ~ str_to_sentence(morphotype),
TRUE ~ "Unknown"
),
morphotype = factor(morphotype, levels = c("Tight branching", "Wide branching", "Unknown")),
depth = readr::parse_number(as.character(depth)),
region = str_extract(site, "^[A-Z]+"),
region = factor(region, levels = c("MRB", "HAU", "MAT")),
site_block = site,
coral_volume_cm3 = coalesce(volume_lab, volume_field),
coral_circumference_cm = coalesce(circ_lab, circ_field),
coral_height_cm = coalesce(height_lab, height_field),
coral_length_cm = coalesce(length_lab, length_field),
coral_width_cm = coalesce(width_lab, width_field),
neighbor_area_m2 = pi * (2^2), # sampling radius = 2 m
number_of_neighbors = as.numeric(number_of_neighbors),
mean_neighbor_distance = as.numeric(mean_neighbor_distance),
mean_total_volume_of_neighbors = as.numeric(mean_total_volume_of_neighbors),
combined_total_volume_of_neighbors = as.numeric(combined_total_volume_of_neighbors),
mean_live_volume_of_neighbors = as.numeric(mean_live_volume_of_neighbors),
combined_live_volume_of_neighbors = as.numeric(combined_live_volume_of_neighbors),
neighbor_density = if_else(!is.na(number_of_neighbors), number_of_neighbors / neighbor_area_m2, NA_real_),
neighbor_total_volume_density = if_else(
!is.na(combined_total_volume_of_neighbors),
combined_total_volume_of_neighbors / neighbor_area_m2,
NA_real_
),
neighbor_live_volume_density = if_else(
!is.na(combined_live_volume_of_neighbors),
combined_live_volume_of_neighbors / neighbor_area_m2,
NA_real_
),
coral_isolation_index = mean_neighbor_distance,
h_substrate = as.numeric(h_substrate),
notes = na_if(notes, "")
) %>%
select(
coral_id, region, site_block, survey_type, sample_date,
morphotype, branch_architecture_raw, depth,
coral_length_cm, coral_width_cm, coral_height_cm,
coral_volume_cm3, coral_circumference_cm, h_substrate,
number_of_neighbors, neighbor_density,
mean_neighbor_distance,
mean_total_volume_of_neighbors, combined_total_volume_of_neighbors,
mean_live_volume_of_neighbors, combined_live_volume_of_neighbors,
neighbor_total_volume_density, neighbor_live_volume_density,
notes
)
glimpse(characteristics)
## Rows: 114
## Columns: 24
## $ coral_id <chr> "MRB-POC01", "MRB-POC02", "MRB-POC0…
## $ region <fct> MRB, MRB, MRB, MRB, MRB, MRB, MRB, …
## $ site_block <chr> "MRB1", "MRB1", "MRB2", "MRB2", "MR…
## $ survey_type <chr> "neighborhood", "neighborhood", "ne…
## $ sample_date <date> 2019-06-25, 2019-06-25, 2019-06-26…
## $ morphotype <fct> Tight branching, Wide branching, Wi…
## $ branch_architecture_raw <chr> "tight", "wide", "wide", "tight", "…
## $ depth <dbl> 6, 6, 6, 5, 6, 6, 7, 7, 5, 6, 6, 7,…
## $ coral_length_cm <dbl> NA, 26, NA, 17, 36, 17, 25, 24, 31,…
## $ coral_width_cm <dbl> NA, 20, NA, 15, 22, 14, 21, 22, 24,…
## $ coral_height_cm <dbl> NA, 13, NA, 11, 27, 11, 18, 18, 15,…
## $ coral_volume_cm3 <dbl> NA, 3539.528, NA, 1468.695, 11196.6…
## $ coral_circumference_cm <dbl> NA, 65, NA, 30, 83, 30, 69, 73, 81,…
## $ h_substrate <dbl> 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 1…
## $ number_of_neighbors <dbl> 22, 21, 53, 66, 24, 38, 28, 36, 14,…
## $ neighbor_density <dbl> 1.75070437, 1.67112690, 4.21760599,…
## $ mean_neighbor_distance <dbl> 137.22727, 88.85714, 113.37736, 128…
## $ mean_total_volume_of_neighbors <dbl> 22.32435, 985.23836, 113.67033, 642…
## $ combined_total_volume_of_neighbors <dbl> 491.1357, 20690.0056, 6024.5275, 42…
## $ mean_live_volume_of_neighbors <dbl> 22.31007, 976.43068, 92.82122, 387.…
## $ combined_live_volume_of_neighbors <dbl> 490.8215, 20505.0444, 4919.5247, 25…
## $ neighbor_total_volume_density <dbl> 39.08333, 1646.45833, 479.41667, 33…
## $ neighbor_live_volume_density <dbl> 39.05833, 1631.73958, 391.48333, 20…
## $ notes <chr> "slurry not retained - proper mater…
cafi <- cafi_raw %>%
janitor::clean_names() %>%
select(-matches("^(x|X)(\\d+)?$")) %>%
mutate(
coral_id = str_trim(coral_id),
site = na_if(site, ""),
extraction_method = str_replace_all(extraction_method, "_", " "),
type = str_replace_all(type, "_", " "),
type = str_to_title(type),
measurement = str_replace_all(measurement, "_", " "),
alt_measurement = str_replace_all(alt_measurement, "_", " "),
cafi_size_mm = readr::parse_number(as.character(cafi_size_mm)),
alt_size_mm = readr::parse_number(as.character(alt_size_mm)),
taxa_lowest = coalesce(
lowest_level,
if_else(!is.na(genus) & !is.na(species), str_c(genus, species, sep = " "), NA_character_),
genus,
family,
code
),
taxa_lowest = str_squish(str_replace_all(taxa_lowest, "_", " ")),
id_certainty = factor(
id_certainty,
levels = c("certain", "revision_possible", "uncertain", "unknown")
),
extraction_method = str_squish(extraction_method),
identification_notes = na_if(identification_notes, ""),
general_notes = na_if(general_notes, "")
) %>%
filter(!is.na(coral_id)) %>%
left_join(
characteristics %>%
select(coral_id, region, site_block, morphotype),
by = "coral_id"
)
glimpse(cafi)
## Rows: 3,989
## Columns: 41
## $ master_sort <dbl> 2825, 1162, 2777, 2778, 1365, 1170, 1207, 2561, 2…
## $ old_sort <dbl> 2815, 1152, 2767, 2768, 1355, 1160, 1197, 2551, 2…
## $ coral_id <chr> "HAU-POC29", "HAU-POC13", "HAU-POC25", "HAU-POC25…
## $ site <chr> NA, "HAU04", NA, NA, "MAT01", "HAU05", "HAU05", N…
## $ code <chr> "ACHI", "ALAE", "ALAE", "ALAE", "ALCL", "ALCO", "…
## $ type <chr> "Crab", "Shrimp", "Shrimp", "Shrimp", "Shrimp", "…
## $ search_term <chr> "Actaeodes hirsutissimus", "Alpheopsis aequalis",…
## $ lowest_level <chr> "species", "species", "species", "species", "spec…
## $ cafi_size_mm <dbl> 10, 9, 8, 10, 14, 16, 11, 13, 15, 16, 16, 15, 17,…
## $ measurement <chr> "carapace width", "rostrum to tail", "rostrum to …
## $ alt_size_mm <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ alt_measurement <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ extraction_method <chr> "clove", "clove", "clove", "clove", "clove", "clo…
## $ photo_num <chr> "Unknown Crab 150", "Unknown Shrimp 367", "Unknow…
## $ id_certainty <fct> certain, revision_possible, revision_possible, re…
## $ general_notes <chr> NA, "gravid", NA, NA, NA, NA, "in post smash fold…
## $ identification_notes <chr> "Black setae on carapace, big claws with white sp…
## $ revised_yn <chr> "y", "y", "y", "y", "y", "y", "y", NA, NA, NA, NA…
## $ revision_date <chr> "5/22/2020", "30-Oct; 2020-05-11", "31-Oct", "31-…
## $ identified_by <chr> "JC", "JC", "AP", "AP", "JC", "AP", "AP", NA, NA,…
## $ revised_by <chr> "JC", "AP; JC; JC", "AP", "AP", "AP; JC; JC", "AP…
## $ previous_name <chr> "pizza_crab", "tiger_alpheid; ALPC", "possible_AT…
## $ worms_id <chr> "209051", "220235", "220235", "220235", "210529",…
## $ phylum <chr> "Arthropoda", "Arthropoda", "Arthropoda", "Arthro…
## $ subphylum <chr> "Crustacea", "Crustacea", "Crustacea", "Crustacea…
## $ superclass <chr> "Multicrustacea", "Multicrustacea", "Multicrustac…
## $ class <chr> "Malacostraca", "Malacostraca", "Malacostraca", "…
## $ subclass <chr> "Eumalacostraca", "Eumalacostraca", "Eumalacostra…
## $ superorder <chr> "Eucarida", "Eucarida", "Eucarida", "Eucarida", "…
## $ order <chr> "Decapoda", "Decapoda", "Decapoda", "Decapoda", "…
## $ suborder <chr> "Pleocyemata", "Pleocyemata", "Pleocyemata", "Ple…
## $ infraorder <chr> "Brachyura", "Caridea", "Caridea", "Caridea", "Ca…
## $ superfamily <chr> "Xanthoidea", "Alpheoidea", "Alpheoidea", "Alpheo…
## $ family <chr> "Xanthidae", "Alpheidae", "Alpheidae", "Alpheidae…
## $ subfamily <chr> "Actaeinae", NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ genus <chr> "Actaeodes", "Alpheopsis", "Alpheopsis", "Alpheop…
## $ species <chr> "Actaeodes hirsutissimus", "Alpheopsis aequalis",…
## $ taxa_lowest <chr> "species", "species", "species", "species", "spec…
## $ region <fct> HAU, HAU, HAU, HAU, MAT, HAU, HAU, HAU, HAU, HAU,…
## $ site_block <chr> "HAU-S2", "HAU4", "HAU-S1", "HAU-S1", "MAT1", "HA…
## $ morphotype <fct> Wide branching, Wide branching, Wide branching, W…
numeric_phys_cols <- c(
"slurry_volume", "dry_weight", "dip_weight", "wax_weight", "surface_area",
"protein_absorbance", "protein_absorbance_se", "protein_mg_ml", "protein_se",
"carb_absorbance", "carb_absorbance_se", "carb_mg_ml", "carb_se",
"avg_num_zoox_cells", "num_zoox_cells_se",
"burned_pan", "burned_pan_se", "burned_pan_dry_res", "burned_pan_dry_res_se",
"dry_tissue", "dry_tissue_se", "burned_pan_burned_res", "burned_pan_burned_res_se",
"afdw_g_ml", "afdw_se",
"protein_mg_cm2", "carb_mg_cm2", "zoox_density", "zoox_cells_cm2", "afdw_mg_cm2",
"protein_mg_cm2_se", "carb_mg_cm2_se", "zoox_density_se", "zoox_cells_cm2_se",
"afdw_mg_cm2_se"
)
physiology <- phys_raw %>%
janitor::clean_names() %>%
select(-matches("^(x|X)(\\d+)?$")) %>%
mutate(
slurry_date = suppressWarnings(mdy(slurry_date)),
dna_collection_date = suppressWarnings(mdy(dna_collection_date)),
branch_width = str_replace_all(branch_width, "_", " ")
) %>%
mutate(
across(
any_of(numeric_phys_cols),
~suppressWarnings(as.numeric(.x))
)
)
phys_summary <- physiology %>%
group_by(coral_id) %>%
summarise(
n_tissue_samples = n(),
mean_surface_area_cm2 = mean(surface_area, na.rm = TRUE),
mean_protein_mg_cm2 = mean(protein_mg_cm2, na.rm = TRUE),
mean_carb_mg_cm2 = mean(carb_mg_cm2, na.rm = TRUE),
mean_zoox_density_cells_ml = mean(zoox_density, na.rm = TRUE),
mean_zoox_cells_cm2 = mean(zoox_cells_cm2, na.rm = TRUE),
mean_afdw_mg_cm2 = mean(afdw_mg_cm2, na.rm = TRUE),
.groups = "drop"
)
glimpse(phys_summary)
## Rows: 108
## Columns: 8
## $ coral_id <chr> "HAU-POC01", "HAU-POC02", "HAU-POC03", "HAU…
## $ n_tissue_samples <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ mean_surface_area_cm2 <dbl> 6615.96, 6214.22, 6547.74, 8961.97, 2045.22…
## $ mean_protein_mg_cm2 <dbl> 0.0004162661, NaN, 0.0031613961, 0.00385897…
## $ mean_carb_mg_cm2 <dbl> 0.0005019176, NaN, 0.0004378101, 0.00060797…
## $ mean_zoox_density_cells_ml <dbl> NaN, 326666.7, 1415000.0, 1083333.3, 163666…
## $ mean_zoox_cells_cm2 <dbl> NaN, 683.3789, 5402.6275, 2659.3855, 1200.3…
## $ mean_afdw_mg_cm2 <dbl> 0.03687296, 0.06234089, 0.04429009, 0.03645…
cafi_counts <- cafi %>%
filter(!is.na(taxa_lowest)) %>%
group_by(coral_id, taxa_lowest) %>%
summarise(
individuals = n(),
mean_size_mm = mean(cafi_size_mm, na.rm = TRUE),
size_sum_mm = if_else(
all(is.na(cafi_size_mm)),
NA_real_,
sum(cafi_size_mm, na.rm = TRUE)
),
.groups = "drop"
)
community_matrix_tbl <- cafi_counts %>%
select(coral_id, taxa_lowest, individuals) %>%
pivot_wider(
names_from = taxa_lowest,
values_from = individuals,
values_fill = 0
) %>%
arrange(coral_id)
community_matrix <- community_matrix_tbl %>%
column_to_rownames(var = "coral_id") %>%
as.matrix()
total_individuals_vec <- rowSums(community_matrix)
richness_vec <- rowSums(community_matrix > 0)
shannon_vec <- vegan::diversity(community_matrix, index = "shannon")
simpson_vec <- vegan::diversity(community_matrix, index = "simpson")
evenness_vec <- if_else(richness_vec > 0, shannon_vec / log(richness_vec), NA_real_)
diversity_df <- tibble(
coral_id = rownames(community_matrix),
total_individuals = as.integer(total_individuals_vec),
species_richness = richness_vec,
shannon = shannon_vec,
simpson = simpson_vec,
pielou_evenness = evenness_vec
) %>%
arrange(coral_id)
cafi_size_summary <- cafi %>%
group_by(coral_id) %>%
summarise(
mean_cafi_size_mm = mean(cafi_size_mm, na.rm = TRUE),
median_cafi_size_mm = median(cafi_size_mm, na.rm = TRUE),
size_sd_mm = sd(cafi_size_mm, na.rm = TRUE),
.groups = "drop"
)
cafi_type_summary <- cafi %>%
filter(!is.na(type)) %>%
mutate(type_clean = str_replace_all(str_to_lower(type), "[^a-z0-9]+", "_")) %>%
count(coral_id, type_clean, name = "n") %>%
mutate(type_clean = str_c("n_type_", type_clean)) %>%
pivot_wider(
names_from = type_clean,
values_from = n,
values_fill = 0
)
diversity_df
survey_analytic_df <- characteristics %>%
left_join(diversity_df, by = "coral_id") %>%
left_join(cafi_size_summary, by = "coral_id") %>%
left_join(cafi_type_summary, by = "coral_id") %>%
left_join(phys_summary, by = "coral_id") %>%
mutate(
total_individuals = replace_na(total_individuals, 0L),
species_richness = replace_na(species_richness, 0),
shannon = replace_na(shannon, 0),
simpson = replace_na(simpson, 0),
pielou_evenness = replace_na(pielou_evenness, 0),
mean_cafi_size_mm = if_else(is.nan(mean_cafi_size_mm), NA_real_, mean_cafi_size_mm),
median_cafi_size_mm = if_else(is.nan(median_cafi_size_mm), NA_real_, median_cafi_size_mm),
size_sd_mm = if_else(is.nan(size_sd_mm), NA_real_, size_sd_mm),
combined_total_volume_of_neighbors = replace_na(combined_total_volume_of_neighbors, 0),
combined_live_volume_of_neighbors = replace_na(combined_live_volume_of_neighbors, 0),
mean_total_volume_of_neighbors = replace_na(mean_total_volume_of_neighbors, 0),
mean_live_volume_of_neighbors = replace_na(mean_live_volume_of_neighbors, 0),
number_of_neighbors = replace_na(number_of_neighbors, 0),
neighbor_density = replace_na(neighbor_density, 0),
neighbor_total_volume_density = replace_na(neighbor_total_volume_density, 0),
neighbor_live_volume_density = replace_na(neighbor_live_volume_density, 0),
coral_volume_cm3 = replace_na(coral_volume_cm3, 0),
log_coral_volume = log10(coral_volume_cm3 + 1),
log_total_individuals = log10(total_individuals + 1),
log_live_neighbor_volume = log10(combined_live_volume_of_neighbors + 1),
log_neighbor_density = log10(neighbor_density + 1e-6)
) %>%
mutate(across(starts_with("n_type_"), ~replace_na(.x, 0L)))
glimpse(survey_analytic_df)
## Rows: 114
## Columns: 53
## $ coral_id <chr> "MRB-POC01", "MRB-POC02", "MRB-POC0…
## $ region <fct> MRB, MRB, MRB, MRB, MRB, MRB, MRB, …
## $ site_block <chr> "MRB1", "MRB1", "MRB2", "MRB2", "MR…
## $ survey_type <chr> "neighborhood", "neighborhood", "ne…
## $ sample_date <date> 2019-06-25, 2019-06-25, 2019-06-26…
## $ morphotype <fct> Tight branching, Wide branching, Wi…
## $ branch_architecture_raw <chr> "tight", "wide", "wide", "tight", "…
## $ depth <dbl> 6, 6, 6, 5, 6, 6, 7, 7, 5, 6, 6, 7,…
## $ coral_length_cm <dbl> NA, 26, NA, 17, 36, 17, 25, 24, 31,…
## $ coral_width_cm <dbl> NA, 20, NA, 15, 22, 14, 21, 22, 24,…
## $ coral_height_cm <dbl> NA, 13, NA, 11, 27, 11, 18, 18, 15,…
## $ coral_volume_cm3 <dbl> 0.000, 3539.528, 0.000, 1468.695, 1…
## $ coral_circumference_cm <dbl> NA, 65, NA, 30, 83, 30, 69, 73, 81,…
## $ h_substrate <dbl> 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 1…
## $ number_of_neighbors <dbl> 22, 21, 53, 66, 24, 38, 28, 36, 14,…
## $ neighbor_density <dbl> 1.75070437, 1.67112690, 4.21760599,…
## $ mean_neighbor_distance <dbl> 137.22727, 88.85714, 113.37736, 128…
## $ mean_total_volume_of_neighbors <dbl> 22.32435, 985.23836, 113.67033, 642…
## $ combined_total_volume_of_neighbors <dbl> 491.1357, 20690.0056, 6024.5275, 42…
## $ mean_live_volume_of_neighbors <dbl> 22.31007, 976.43068, 92.82122, 387.…
## $ combined_live_volume_of_neighbors <dbl> 490.8215, 20505.0444, 4919.5247, 25…
## $ neighbor_total_volume_density <dbl> 39.08333, 1646.45833, 479.41667, 33…
## $ neighbor_live_volume_density <dbl> 39.05833, 1631.73958, 391.48333, 20…
## $ notes <chr> "slurry not retained - proper mater…
## $ total_individuals <int> 13, 30, 42, 17, 58, 39, 37, 27, 30,…
## $ species_richness <dbl> 3, 5, 4, 3, 6, 5, 4, 4, 2, 3, 4, 4,…
## $ shannon <dbl> 0.6870920, 0.8661933, 0.8551727, 0.…
## $ simpson <dbl> 0.37869822, 0.43111111, 0.48412698,…
## $ pielou_evenness <dbl> 0.6254181, 0.5381961, 0.6168767, 0.…
## $ mean_cafi_size_mm <dbl> 37.500000, 30.000000, NA, NA, NA, N…
## $ median_cafi_size_mm <dbl> 37.50, 30.00, NA, NA, NA, NA, 60.00…
## $ size_sd_mm <dbl> 3.535534, NA, NA, NA, NA, NA, NA, N…
## $ n_type_crab <int> 2, 9, 6, 7, 21, 4, 7, 12, 10, 14, 1…
## $ n_type_fish <int> 2, 3, 0, 0, 4, 0, 2, 4, 4, 6, 4, 2,…
## $ n_type_shrimp <int> 7, 14, 32, 8, 23, 28, 24, 7, 11, 14…
## $ n_type_snail <int> 0, 2, 2, 0, 3, 2, 3, 2, 1, 1, 2, 8,…
## $ n_type_squat_lobster <int> 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0,…
## $ n_type_echinoderm <int> 1, 0, 0, 0, 0, 2, 1, 0, 0, 2, 3, 7,…
## $ n_type_hermit <int> 0, 1, 1, 0, 2, 2, 0, 2, 4, 10, 3, 6…
## $ n_type_worm <int> 1, 1, 1, 2, 3, 0, 0, 0, 0, 1, 0, 0,…
## $ n_type_amphipod <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_type_amph <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ n_tissue_samples <int> NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ mean_surface_area_cm2 <dbl> NA, 2181.660, 3428.570, 2387.836, 6…
## $ mean_protein_mg_cm2 <dbl> NA, 0.0065042216, 0.0055278148, 0.0…
## $ mean_carb_mg_cm2 <dbl> NA, 0.0008617291, 0.0008181253, 0.0…
## $ mean_zoox_density_cells_ml <dbl> NA, 223000.0, 1026666.7, 431666.7, …
## $ mean_zoox_cells_cm2 <dbl> NA, 3066.4723, 6737.5028, 2802.0489…
## $ mean_afdw_mg_cm2 <dbl> NA, 0.13819752, 0.05282815, 0.08146…
## $ log_coral_volume <dbl> 0.000000, 3.549068, 0.000000, 3.167…
## $ log_total_individuals <dbl> 1.146128, 1.491362, 1.633468, 1.255…
## $ log_live_neighbor_volume <dbl> 2.691808, 4.311882, 3.692011, 4.407…
## $ log_neighbor_density <dbl> 0.24321306, 0.22300969, 0.62506611,…
colony_summary <- survey_analytic_df %>%
group_by(region, morphotype) %>%
summarise(
n_colonies = n(),
median_volume_cm3 = median(coral_volume_cm3, na.rm = TRUE),
iqr_volume_cm3 = IQR(coral_volume_cm3, na.rm = TRUE),
median_neighbors = median(number_of_neighbors, na.rm = TRUE),
median_total_individuals = median(total_individuals, na.rm = TRUE),
q1_richness = quantile(species_richness, 0.25, na.rm = TRUE),
median_richness = median(species_richness, na.rm = TRUE),
q3_richness = quantile(species_richness, 0.75, na.rm = TRUE),
.groups = "drop"
)
knitr::kable(
colony_summary,
caption = "Focal colony context by region and morphotype"
)
| region | morphotype | n_colonies | median_volume_cm3 | iqr_volume_cm3 | median_neighbors | median_total_individuals | q1_richness | median_richness | q3_richness |
|---|---|---|---|---|---|---|---|---|---|
| MRB | Tight branching | 19 | 2450.442 | 3767.817 | 15.0 | 37.0 | 2 | 3 | 4.00 |
| MRB | Wide branching | 18 | 3932.750 | 6094.035 | 8.5 | 34.0 | 2 | 3 | 3.75 |
| HAU | Tight branching | 20 | 4063.126 | 4822.345 | 1.5 | 21.0 | 2 | 3 | 3.00 |
| HAU | Wide branching | 18 | 6168.517 | 9861.852 | 1.5 | 22.0 | 2 | 3 | 3.00 |
| MAT | Tight branching | 20 | 4368.908 | 3924.635 | 4.0 | 27.5 | 2 | 3 | 4.00 |
| MAT | Wide branching | 19 | 3979.351 | 5596.747 | 4.0 | 21.0 | 2 | 3 | 3.00 |
p_abundance_region <- survey_analytic_df %>%
ggplot(aes(x = region, y = total_individuals, fill = morphotype)) +
geom_boxplot(alpha = 0.8, outlier.shape = 21, outlier.fill = "white") +
scale_fill_manual(values = morphotype_palette, na.translate = FALSE) +
scale_y_log10(labels = label_number(big.mark = ",")) +
labs(
x = "Backreef region",
y = "Total CAFI individuals (log10 scale)",
fill = "Morphotype",
title = "CAFI abundance varies across regions and morphotypes"
) +
plot_theme
p_abundance_neighbor <- survey_analytic_df %>%
ggplot(aes(x = combined_live_volume_of_neighbors + 1, y = total_individuals, color = region)) +
geom_point(alpha = 0.75, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
scale_color_manual(values = region_palette) +
scale_x_log10(labels = label_number(scale_cut = cut_short_scale())) +
scale_y_log10(labels = label_number(scale_cut = cut_short_scale())) +
facet_wrap(~ morphotype) +
labs(
x = "Live neighbor Pocillopora volume (cm³, log scale)",
y = "Total CAFI individuals (log scale)",
color = "Region",
title = "Higher live Pocillopora neighborhood volume is linked to richer CAFI communities"
) +
plot_theme
p_abundance_region + p_abundance_neighbor + plot_layout(guides = "collect")
ggsave(
filename = here(figure_dir, "cafi_abundance_regional_patterns.png"),
plot = p_abundance_region,
width = 7,
height = 5,
dpi = 300
)
ggsave(
filename = here(figure_dir, "cafi_abundance_vs_neighbor_volume.png"),
plot = p_abundance_neighbor,
width = 7,
height = 5,
dpi = 300
)
p_diversity <- survey_analytic_df %>%
ggplot(aes(x = morphotype, y = shannon, fill = region)) +
geom_boxplot(alpha = 0.9, outlier.shape = NA, width = 0.7) +
geom_jitter(
aes(color = region),
width = 0.15,
alpha = 0.6,
size = 2
) +
scale_fill_manual(values = region_palette) +
scale_color_manual(values = region_palette) +
labs(
x = "Morphotype",
y = "Shannon diversity",
title = "CAFI diversity by morphotype and region"
) +
plot_theme
p_evenness_volume <- survey_analytic_df %>%
ggplot(aes(x = coral_volume_cm3 + 1, y = pielou_evenness, color = morphotype)) +
geom_point(alpha = 0.75, size = 2.5) +
geom_smooth(method = "loess", se = TRUE) +
scale_color_manual(values = morphotype_palette) +
scale_x_log10(labels = label_number(scale_cut = cut_short_scale())) +
labs(
x = "Coral colony volume (cm³, log scale)",
y = "Pielou's evenness",
color = "Morphotype",
title = "Evenness is highest on intermediate-sized colonies"
) +
plot_theme
p_diversity + p_evenness_volume + plot_layout(guides = "collect")
ggsave(
filename = here(figure_dir, "cafi_diversity_summaries.png"),
plot = p_diversity + p_evenness_volume + plot_layout(guides = "collect"),
width = 10,
height = 5,
dpi = 300
)
type_composition <- cafi %>%
filter(!is.na(type)) %>%
mutate(
type_group = fct_lump_n(type, n = 8, other_level = "Other"),
type_group = fct_reorder(type_group, type_group, .fun = length),
region = fct_drop(region)
) %>%
count(region, morphotype, type_group, name = "n") %>%
group_by(region, morphotype) %>%
mutate(
prop = n / sum(n)
) %>%
ungroup()
p_type_composition <- type_composition %>%
ggplot(aes(x = region, y = prop, fill = type_group)) +
geom_col(position = "stack", alpha = 0.9) +
facet_wrap(~ morphotype, nrow = 1) +
scale_y_continuous(labels = label_percent(accuracy = 1)) +
labs(
x = "Region",
y = "Community share",
fill = "Functional group",
title = "CAFI functional composition differs by site and morphotype"
) +
plot_theme
p_type_composition
ggsave(
filename = here(figure_dir, "cafi_functional_composition.png"),
plot = p_type_composition,
width = 9,
height = 5,
dpi = 300
)
count_model_df <- survey_analytic_df %>%
filter(
coral_volume_cm3 > 0,
combined_live_volume_of_neighbors >= 0,
!is.na(morphotype),
!is.na(region)
) %>%
mutate(
scaled_log_coral_volume = as.numeric(scale(log10(coral_volume_cm3))),
scaled_log_live_neighbor_volume = as.numeric(scale(log10(combined_live_volume_of_neighbors + 1))),
scaled_neighbor_density = as.numeric(scale(neighbor_density))
)
count_model <- MASS::glm.nb(
total_individuals ~ region + morphotype +
scaled_log_coral_volume +
scaled_log_live_neighbor_volume +
scaled_neighbor_density,
data = count_model_df
)
count_model_tidy <- broom::tidy(
count_model,
exponentiate = TRUE,
conf.int = TRUE
) %>%
mutate(across(where(is.numeric), ~round(.x, 3)))
count_model_glance <- broom::glance(count_model)
knitr::kable(
count_model_tidy,
caption = "Negative binomial model of CAFI abundance (rate ratios)"
)
| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| (Intercept) | 38.144 | 0.107 | 33.925 | 0.000 | 31.016 | 47.278 |
| regionHAU | 0.734 | 0.137 | -2.255 | 0.024 | 0.561 | 0.960 |
| regionMAT | 0.845 | 0.134 | -1.256 | 0.209 | 0.648 | 1.102 |
| morphotypeWide branching | 0.738 | 0.102 | -2.972 | 0.003 | 0.605 | 0.901 |
| scaled_log_coral_volume | 2.113 | 0.057 | 13.042 | 0.000 | 1.889 | 2.366 |
| scaled_log_live_neighbor_volume | 0.952 | 0.073 | -0.680 | 0.497 | 0.820 | 1.105 |
| scaled_neighbor_density | 1.061 | 0.075 | 0.796 | 0.426 | 0.902 | 1.260 |
diversity_model_df <- survey_analytic_df %>%
filter(
coral_volume_cm3 > 0,
!is.na(morphotype),
!is.na(region),
!is.na(mean_protein_mg_cm2)
) %>%
mutate(
scaled_log_coral_volume = as.numeric(scale(log10(coral_volume_cm3))),
scaled_protein = as.numeric(scale(mean_protein_mg_cm2)),
scaled_log_live_neighbor_volume = as.numeric(scale(log10(combined_live_volume_of_neighbors + 1)))
)
diversity_model <- lm(
shannon ~ region + morphotype +
scaled_log_coral_volume +
scaled_log_live_neighbor_volume +
scaled_protein,
data = diversity_model_df
)
diversity_model_tidy <- broom::tidy(
diversity_model,
conf.int = TRUE
) %>%
mutate(across(where(is.numeric), ~round(.x, 3)))
diversity_model_glance <- broom::glance(diversity_model)
knitr::kable(
diversity_model_tidy,
caption = "Linear model of Shannon diversity"
)
| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| (Intercept) | 0.531 | 0.063 | 8.435 | 0.000 | 0.406 | 0.656 |
| regionHAU | -0.072 | 0.073 | -0.983 | 0.328 | -0.218 | 0.074 |
| regionMAT | -0.096 | 0.074 | -1.297 | 0.198 | -0.242 | 0.051 |
| morphotypeWide branching | 0.053 | 0.064 | 0.827 | 0.410 | -0.074 | 0.179 |
| scaled_log_coral_volume | 0.064 | 0.031 | 2.095 | 0.039 | 0.003 | 0.125 |
| scaled_log_live_neighbor_volume | 0.006 | 0.031 | 0.188 | 0.851 | -0.055 | 0.066 |
| scaled_protein | 0.043 | 0.032 | 1.333 | 0.186 | -0.021 | 0.107 |
valid_rows <- total_individuals_vec > 0
community_for_ord <- community_matrix[valid_rows, ]
nmds_df <- survey_analytic_df %>%
filter(coral_id %in% rownames(community_for_ord)) %>%
select(coral_id, region, morphotype, log_coral_volume, log_live_neighbor_volume) %>%
mutate(
scaled_log_coral_volume = as.numeric(scale(log_coral_volume)),
scaled_log_live_neighbor_volume = as.numeric(scale(log_live_neighbor_volume))
) %>%
column_to_rownames("coral_id")
community_for_ord <- community_for_ord[rownames(nmds_df), ]
set.seed(2025)
nmds_fit <- vegan::metaMDS(
community_for_ord,
distance = "bray",
k = 2,
trymax = 200,
autotransform = FALSE
)
## Run 0 stress 0.08522801
## Run 1 stress 0.09962531
## Run 2 stress 0.09124473
## Run 3 stress 0.104316
## Run 4 stress 0.09165996
## Run 5 stress 0.09136372
## Run 6 stress 0.09618744
## Run 7 stress 0.0996444
## Run 8 stress 0.1007685
## Run 9 stress 0.08625413
## Run 10 stress 0.07103067
## ... New best solution
## ... Procrustes: rmse 0.01563212 max resid 0.09477906
## Run 11 stress 0.07100285
## ... New best solution
## ... Procrustes: rmse 0.003218389 max resid 0.0236027
## Run 12 stress 0.0974593
## Run 13 stress 0.08604393
## Run 14 stress 0.0793779
## Run 15 stress 0.09241526
## Run 16 stress 0.09576412
## Run 17 stress 0.08490274
## Run 18 stress 0.06944839
## ... New best solution
## ... Procrustes: rmse 0.01578052 max resid 0.1402104
## Run 19 stress 0.08406536
## Run 20 stress 0.09809897
## Run 21 stress 0.09611493
## Run 22 stress 0.08785304
## Run 23 stress 0.09649643
## Run 24 stress 0.08864346
## Run 25 stress 0.07801675
## Run 26 stress 0.09536803
## Run 27 stress 0.08590269
## Run 28 stress 0.09010298
## Run 29 stress 0.0967086
## Run 30 stress 0.07057654
## Run 31 stress 0.09348961
## Run 32 stress 0.09878032
## Run 33 stress 0.09292848
## Run 34 stress 0.07057684
## Run 35 stress 0.07974323
## Run 36 stress 0.09274154
## Run 37 stress 0.09243711
## Run 38 stress 0.0992964
## Run 39 stress 0.09365048
## Run 40 stress 0.08231855
## Run 41 stress 0.08353018
## Run 42 stress 0.08818121
## Run 43 stress 0.09802528
## Run 44 stress 0.09243676
## Run 45 stress 0.07801679
## Run 46 stress 0.07927319
## Run 47 stress 0.07937816
## Run 48 stress 0.09645109
## Run 49 stress 0.1009254
## Run 50 stress 0.08094626
## Run 51 stress 0.1057153
## Run 52 stress 0.08349488
## Run 53 stress 0.09668046
## Run 54 stress 0.09669814
## Run 55 stress 0.07057662
## Run 56 stress 0.09409087
## Run 57 stress 0.07969821
## Run 58 stress 0.07974273
## Run 59 stress 0.08778661
## Run 60 stress 0.08094586
## Run 61 stress 0.07100226
## Run 62 stress 0.10258
## Run 63 stress 0.09125524
## Run 64 stress 0.09169144
## Run 65 stress 0.09942895
## Run 66 stress 0.09188527
## Run 67 stress 0.0900846
## Run 68 stress 0.09573424
## Run 69 stress 0.07794443
## Run 70 stress 0.1016823
## Run 71 stress 0.07057662
## Run 72 stress 0.09234937
## Run 73 stress 0.09833703
## Run 74 stress 0.08546521
## Run 75 stress 0.08920936
## Run 76 stress 0.1022597
## Run 77 stress 0.09627738
## Run 78 stress 0.08984352
## Run 79 stress 0.08203874
## Run 80 stress 0.08934162
## Run 81 stress 0.06944183
## ... New best solution
## ... Procrustes: rmse 0.0008549897 max resid 0.00476795
## ... Similar to previous best
## *** Best solution repeated 1 times
nmds_scores <- vegan::scores(nmds_fit, display = "sites") %>%
as_tibble(rownames = "coral_id") %>%
left_join(
survey_analytic_df %>%
select(coral_id, region, morphotype, log_coral_volume, log_live_neighbor_volume),
by = "coral_id"
)
p_nmds <- nmds_scores %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = region, shape = morphotype, size = log_coral_volume)) +
geom_point(alpha = 0.85) +
scale_color_manual(values = region_palette) +
scale_size_continuous(range = c(2, 6)) +
coord_equal() +
labs(
x = "NMDS axis 1",
y = "NMDS axis 2",
color = "Region",
shape = "Morphotype",
size = "log10 colony volume",
title = "NMDS ordination of CAFI community composition (Bray-Curtis)"
) +
plot_theme
p_nmds
ggsave(
filename = here(figure_dir, "cafi_ordination_nmds.png"),
plot = p_nmds,
width = 7,
height = 6,
dpi = 300
)
permanova_df <- survey_analytic_df %>%
filter(coral_id %in% rownames(community_for_ord)) %>%
mutate(
scaled_log_coral_volume = as.numeric(scale(log_coral_volume)),
scaled_log_live_neighbor_volume = as.numeric(scale(log_live_neighbor_volume))
) %>%
drop_na(region, morphotype, scaled_log_coral_volume, scaled_log_live_neighbor_volume) %>%
select(coral_id, region, morphotype, scaled_log_coral_volume, scaled_log_live_neighbor_volume) %>%
column_to_rownames("coral_id")
community_for_perm <- community_for_ord[rownames(permanova_df), ]
if (nrow(permanova_df) >= 3) {
set.seed(2025)
permanova <- vegan::adonis2(
community_for_perm ~ region + morphotype +
scaled_log_coral_volume + scaled_log_live_neighbor_volume,
data = permanova_df,
permutations = 999,
method = "bray"
)
permanova_tbl <- permanova %>%
as.data.frame() %>%
rownames_to_column("term")
knitr::kable(
permanova_tbl,
digits = 3,
caption = "PERMANOVA results for CAFI community structure"
)
} else {
permanova_tbl <- tibble(
term = character(),
SumOfSqs = numeric(),
R2 = numeric(),
F = numeric(),
Pr = numeric()
)
message("PERMANOVA skipped: fewer than 3 colonies with complete data.")
}
| term | Df | SumOfSqs | R2 | F | Pr(>F) |
|---|---|---|---|---|---|
| Model | 5 | 4.754 | 0.292 | 8.898 | 0.001 |
| Residual | 108 | 11.541 | 0.708 | NA | NA |
| Total | 113 | 16.295 | 1.000 | NA | NA |
readr::write_csv(
survey_analytic_df,
here(table_dir, "survey_analytic_dataset.csv")
)
readr::write_csv(
colony_summary,
here(table_dir, "colony_summary_by_region_morphotype.csv")
)
readr::write_csv(
count_model_tidy,
here(table_dir, "model_cafi_abundance_nb.csv")
)
readr::write_csv(
diversity_model_tidy,
here(table_dir, "model_shannon_linear.csv")
)
readr::write_csv(
permanova_tbl,
here(table_dir, "permanova_cafi_composition.csv")
)
readr::write_csv(
type_composition,
here(table_dir, "cafi_functional_composition_by_region.csv")
)
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MASS_7.3-65 scales_1.4.0 patchwork_1.3.1 broom_1.0.9
## [5] vegan_2.7-1 permute_0.9-8 fs_1.6.6 glue_1.8.0
## [9] janitor_2.2.1 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [13] dplyr_1.1.4 purrr_1.1.0 readr_2.1.5 tidyr_1.3.1
## [17] tibble_3.3.0 ggplot2_3.5.2 tidyverse_2.0.0 here_1.0.1
## [21] conflicted_1.2.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.53 bslib_0.9.0 lattice_0.22-7
## [5] tzdb_0.5.0 vctrs_0.6.5 tools_4.5.1 generics_0.1.4
## [9] parallel_4.5.1 cluster_2.1.8.1 pkgconfig_2.0.3 Matrix_1.7-3
## [13] RColorBrewer_1.1-3 lifecycle_1.0.4 compiler_4.5.1 farver_2.1.2
## [17] textshaping_1.0.1 snakecase_0.11.1 htmltools_0.5.8.1 sass_0.4.10
## [21] yaml_2.3.10 pillar_1.11.0 crayon_1.5.3 jquerylib_0.1.4
## [25] cachem_1.1.0 nlme_3.1-168 tidyselect_1.2.1 digest_0.6.37
## [29] stringi_1.8.7 labeling_0.4.3 splines_4.5.1 rprojroot_2.1.0
## [33] fastmap_1.2.0 grid_4.5.1 cli_3.6.5 magrittr_2.0.3
## [37] withr_3.0.2 backports_1.5.0 bit64_4.6.0-1 timechange_0.3.0
## [41] rmarkdown_2.29 bit_4.6.0 ragg_1.4.0 hms_1.1.3
## [45] memoise_2.0.1 evaluate_1.0.4 knitr_1.50 mgcv_1.9-3
## [49] rlang_1.1.6 vroom_1.6.5 jsonlite_2.0.0 R6_2.6.1
## [53] systemfonts_1.2.3